1 Load Packages

if (!require("knitr")) {install.packages("knitr"); require("knitr")}
if (!requireNamespace('BiocManager', quietly = TRUE)) {install.packages('BiocManager'); require("BiocManager")}
if (!require("dplyr")) {install.packages("dplyr"); require("dplyr")}
if (!require("stringr")) {install.packages("stringr"); require("stringr")}
if (!require("Seurat")) {install.packages("Seurat"); require("Seurat")}
if (!require("sctransform")) {install.packages("sctransform"); require("sctransform")}
if (!require("glmGamPoi")) {BiocManager::install('glmGamPoi'); require("glmGamPoi")}
if (!require("patchwork")) {install.packages("patchwork"); require("patchwork")}
if (!require("ggplot2")) {install.packages("ggplot2"); require("ggplot2")}
if (!require("EnhancedVolcano")) {BiocManager::install('EnhancedVolcano'); require("EnhancedVolcano")}
if (!require("DESeq2")) {BiocManager::install('DESeq2'); require("DESeq2")}
if (!require("tidyverse")) {install.packages("tidyverse"); require("tidyverse")}
if (!require("RColorBrewer")) {install.packages("RColorBrewer"); require("RColorBrewer")}
if (!require("car")) {install.packages("car"); require("car")}
if (!require("openxlsx")) {install.packages("openxlsx"); require("openxlsx")}
if (!require("readxl")) {install.packages("readxl"); require("readxl")}
if (!require("ggrepel")) {install.packages("ggrepel"); require("ggrepel")}
if (!require("gghighlight")) {install.packages("gghighlight"); require("gghighlight")}
if (!require("ggpmisc")) {install.packages("ggpmisc"); require("ggpmisc")}
if (!require("data.table")) {install.packages("data.table"); require("data.table")}
if (!require("here")) {install.packages("here"); require("here")}
if (!require("NatParksPalettes")) {install.packages("NatParksPalettes"); require("NatParksPalettes")}
if (!require("svglite")) {install.packages("svglite"); require("svglite")}
if (!require("ggvenn")) {install.packages("ggvenn"); require("ggvenn")}
if (!require("kableExtra")) {install.packages("kableExtra"); require("kableExtra")} # for color brewer

here()
## [1] "/Users/emberley/Documents/PIPseq/LIB241121BE/R files"
set.seed(2469)

2 Helpful Objects

# Color palette for PIPseq that coordinates with Yellowstone for clusters
cluster_colors<- c("#0067A2", "#CC782B", "#5A8D66", "#509EA0", "#71A2B8", "#8A9BA9", "#B46DB3", "black")

# Color palette for replicates -- adapted from the Torres colors
replicate_colors <- c( "#7391BD" ,"#894846" ,"#E9988C" ,"#535260", "#B7A7A6" ,"#785838", "#C68D61" ,"#93995C")

# Color pallete for "disordered" sample_ID (in order of lexicon, 1, 10-16, 2-9)
sample_colors_d <- c("#C00000", "#FF8082", "#BFA85F", "#CCCA79", "#799E51", "#448A64", 
  "#568BB1", "#775790", "#FF0000", "#FFC000", "#F5EF1E", "#92D050", 
  "#00B050", "#0070C0", "#7030A0", "#D08B8C")

# Group color pallet
age_colors <- c("#BFA85F", "purple")

# genotype color pallet
genotype_colors <- c("orange", "blue")

3 Increase Globals

options(future.globals.maxSize = 74 * 1024^3) # 55 GB
getOption("future.globals.maxSize") #59055800320
## [1] 79456894976

4 Load Object

seu.obj<-readRDS(here("3-ON_Combined_Integration", "Intermediate_Objects", "3_ON_Combined_integrated_36dims_UMAP_renamed.rds"))

5 Psuedobulk Object

5.1 Group the cells and aggregate

Group by the cell type and samples

pbo <- AggregateExpression(seu.obj, 
                           group.by = c("sample_ID"), # CHANGE the column with both condition and sample ID
                           assays = 'RNA', 
                           slot = "counts",
                           return.seurat = FALSE)

5.2 Extract the counts

We will get a huge matrix with columns as the cell_samples and rows as genes

pbo <- pbo$RNA
pbo[1:16, 1:16]
## 16 x 16 sparse Matrix of class "dgCMatrix"
##                                                                         
## Xkr4    1.005065e+04 8531.7380157 28428.886737 10604.305657 27195.171305
## Gm1992  3.513907e+01   28.8691427   318.077866    69.204201    66.358061
## Gm19938 9.863506e+02  763.3925108  2943.304557  1162.562618  2349.565503
## Gm37381 8.609018e+01   44.3063990    90.245531    79.863555    93.681319
## Rp1     2.107700e+02  209.2694956   264.376969   342.601798   310.857194
## Sox17   1.398350e+01   10.8556093    22.217328     .           16.000000
## Gm37587 9.755971e-01    1.0000000     2.000000     1.909418     .       
## Gm37323 .               .             3.000000     .            8.000000
## Mrpl15  3.440870e+02  659.7764855  1292.785611   861.924570  1169.309141
## Lypla1  1.429060e+03 2108.9332342  3418.550371  2491.205507  4511.441558
## Tcea1   2.190187e+03 3266.8279613  5966.326153  3865.592403  6579.825801
## Rgs20   5.308359e+03 8267.0689440  9220.293812  6291.602598 28530.719980
## Gm16041 1.000000e+00    0.9848024     4.000000     1.000000     3.000000
## Atp6v1h 1.720426e+03 2684.6824962  6535.562807  4591.969173  6627.277939
## Oprk1   1.976713e+00    9.8252999     5.227619     2.348087     1.785231
## Npbwr1  .               .             .            1.000000     1.000000
##                                                                       
## Xkr4    1.429128e+04 2.221777e+04 35199.592124 7899.357666 7985.941494
## Gm1992  4.176587e+01 6.013632e+02   322.646652   23.794623   73.545658
## Gm19938 1.337453e+03 2.187208e+03  3721.558003  797.973781  839.914269
## Gm37381 1.082198e+02 7.468100e+01   250.442379   79.223174   59.028343
## Rp1     3.411909e+02 3.164240e+02  2040.391228  198.240008  104.395270
## Sox17   7.370645e+01 1.731185e+02    32.747574   16.000000    4.000000
## Gm37587 3.000000e+00 3.988385e+00     1.872795    1.000000    1.000000
## Gm37323 2.879465e+00 2.000000e+00     4.000000    2.000000    .       
## Mrpl15  7.694570e+02 1.011745e+03  1044.521467  284.746009  264.083560
## Lypla1  2.814141e+03 3.543669e+03  3427.365583 1411.607888  906.369981
## Tcea1   3.989301e+03 5.456729e+03  4930.747053 1898.709044 1415.798304
## Rgs20   1.191737e+04 1.044019e+04  6428.747631 3225.760061 2477.942463
## Gm16041 9.815164e-01 9.164695e-01     1.000000    .           .       
## Atp6v1h 4.310653e+03 5.418279e+03  6693.811154 1953.660254 1498.061720
## Oprk1   4.849232e+00 4.903818e+00     5.454435    2.827486    1.974671
## Npbwr1  1.965453e+00 2.925962e+00     .           .           .       
##                                                                       
## Xkr4    4617.3599233 5416.475418 6444.1838137 8735.2728291 8770.040917
## Gm1992    19.9922997   20.674153   30.1033969  159.0593248   99.398404
## Gm19938  493.5839763  449.720121  679.1159401  767.9078366 1034.660216
## Gm37381   63.5715425   61.499140   70.0959505   61.5985225   95.941301
## Rp1      159.1010489  123.826772  177.6726989  176.5307113  568.234546
## Sox17      .            9.980129    0.9870518    7.0000000    4.995495
## Gm37587    0.9890630    .           2.0000000    1.0000000    2.000000
## Gm37323    1.0000000    2.000000    1.0000000    0.9866936    .       
## Mrpl15   314.0395904  236.280570  330.7964683  307.1836364  261.155023
## Lypla1  1003.6451369  877.646450 1275.5648670 1049.9732704  908.769311
## Tcea1   1475.2332515 1355.550575 1939.2613952 1685.3659795 1303.841173
## Rgs20   1943.0127646 3939.503522 3227.6199132 3018.8145235 1613.205682
## Gm16041    .            .           .            1.0000000    .       
## Atp6v1h 1947.4733537 1218.252027 1859.9208987 1597.0949452 1623.701798
## Oprk1      0.9860042    3.986753    0.9378364    4.9447968    .       
## Npbwr1     .            .           .            .            .       
##                     
## Xkr4    1.309683e+04
## Gm1992  3.609525e+01
## Gm19938 1.299005e+03
## Gm37381 6.380994e+01
## Rp1     2.434054e+02
## Sox17   1.095859e+01
## Gm37587 .           
## Gm37323 5.000000e+00
## Mrpl15  6.410937e+02
## Lypla1  2.644889e+03
## Tcea1   3.985017e+03
## Rgs20   1.462818e+04
## Gm16041 2.000000e+00
## Atp6v1h 2.516210e+03
## Oprk1   1.232708e-01
## Npbwr1  9.606854e-01
colnames(pbo)[1:16]
##  [1] "g1-yWT10-a"  "g10-aWT10-b" "g11-y10-b"   "g12-a10-b"   "g13-yWT20-b"
##  [6] "g14-aWT20-b" "g15-y20-b"   "g16-a20-b"   "g2-aWT10-a"  "g3-y10-a"   
## [11] "g4-a10-a"    "g5-yWT20-a"  "g6-aWT20-a"  "g7-y20-a"    "g8-a20-a"   
## [16] "g9-yWT10-b"

5.3 Add Metadata

This allows you to add whichever components of metadata you’d like to include to graph you PCA data by and whichever components you want to design the DESeq object from.

metadata <- seu.obj@meta.data %>%
  as.data.frame() %>%
  dplyr::select(sample_ID, replicate, age_group, genotype, day_nuclei_isolated) # CHANGE These correspond to the column names of the metadata you want to extract 

dim(metadata)
## [1] 118550      5
head(metadata)
# Exclude duplicated rows 
metadata <- metadata[!duplicated(metadata), ]

dim(metadata)
## [1] 16  5
metadata
# Rename rows to represent the sample IDs rather than the cell barcode IDs
rownames(metadata) <- metadata$sample_ID
head(metadata)
# This enusres the columns match up between pbo and the metadata.
sample_names <- unique(seu.obj@meta.data$sample_ID) # CHANGE to be the column that you have for your samples
sample_names
##  [1] "1_yWT10_a"  "9_yWT10_b"  "2_aWT10_a"  "10_aWT10_b" "3_y10_a"   
##  [6] "11_y10_b"   "4_a10_a"    "12_a10_b"   "5_yWT20_a"  "13_yWT20_b"
## [11] "6_aWT20_a"  "14_aWT20_b" "7_y20_a"    "15_y20_b"   "8_a20_a"   
## [16] "16_a20_b"
colnames(pbo) <- sample_names

5.4 Make DESeq Dataset

When making the DESeq dataset, make sure the countData have the exact same column names in the exacts same order as the rows of metadata. You get to choose your design for what the model will base it’s sample matrix on. Select one or more of your metadata columns.

dds <- DESeqDataSetFromMatrix(countData = round(pbo), # Integers must be used for this matrix. 
                              colData = metadata,
                              design = ~ sample_ID) # CHANGE I am choosing to use sample ID rather than replicate here as we are trying to identify whether or not y20icKO batch differences will be an issue.
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors

5.5 Run PCA analyses

# Transform counts for data visualization
rld <- rlog(dds, blind=TRUE)

5.5.1 Plot PCA analyses

Change this section to customize based on which metadata you would like plotted. Note, this section includes several customizations that are user specific based on metadata and color preferences.

sample_names <- unique(rld$sample_ID)

# Plot PCA
sample<-DESeq2::plotPCA(rld, ntop = 1000, intgroup = "sample_ID") + 
  scale_color_manual(values = sample_colors_d)+ 
  geom_text_repel(aes(label = sample_names))+
  coord_fixed(ratio = 2.5) +  # Aspect ratio of 2 means the x-axis is twice as long as the y-axis
  theme_minimal()
## using ntop=1000 top features by variance
## Coordinate system already present. Adding new coordinate system, which will
## replace the existing one.
replicate<-DESeq2::plotPCA(rld, ntop = 1000, intgroup = "replicate") + 
  scale_color_manual(values = replicate_colors)+
  coord_fixed(ratio = 2.5) +  # Aspect ratio of 2 means the x-axis is twice as long as the y-axis
  theme_minimal()
## using ntop=1000 top features by variance
## Coordinate system already present. Adding new coordinate system, which will
## replace the existing one.
age<-DESeq2::plotPCA(rld, ntop = 1000, intgroup = "age_group") + 
  scale_color_manual(values = age_colors)+
  coord_fixed(ratio = 2.5) +  # Aspect ratio of 2 means the x-axis is twice as long as the y-axis
  theme_minimal()
## using ntop=1000 top features by variance
## Coordinate system already present. Adding new coordinate system, which will
## replace the existing one.
genotype<-DESeq2::plotPCA(rld, ntop = 1000, intgroup = "genotype") + 
  scale_color_manual(values = genotype_colors)+
  coord_fixed(ratio = 2.5) +  # Aspect ratio of 2 means the x-axis is twice as long as the y-axis
  theme_minimal()
## using ntop=1000 top features by variance
## Coordinate system already present. Adding new coordinate system, which will
## replace the existing one.
batch<-DESeq2::plotPCA(rld, ntop = 1000, intgroup = "day_nuclei_isolated") + 
  scale_color_manual(values = c("pink", "darkgreen"))+
  coord_fixed(ratio = 2.5) +  # Aspect ratio of 2 means the x-axis is twice as long as the y-axis
  theme_minimal()
## using ntop=1000 top features by variance
## Coordinate system already present. Adding new coordinate system, which will
## replace the existing one.
plot(sample)

plot(replicate)

plot(age)

plot(genotype)

plot(batch)

# Save the plots
folder<-here("4-Analysis", "PCA")

ggsave("PCA by Sample.svg", sample, path = folder)
## Saving 7 x 5 in image
ggsave("PCA by Replicate.svg", replicate, path = folder)
## Saving 7 x 5 in image
ggsave("PCA by Age.svg", age, path = folder)
## Saving 7 x 5 in image
ggsave("PCA by Genotype.svg", genotype, path = folder)
## Saving 7 x 5 in image
ggsave("PCA by Batch.svg", batch, path = folder)
## Saving 7 x 5 in image

6 Run PCA on a per cluster basis

7 Psuedobulk Object

7.1 Group the cells and aggregate

Group by the cell type and samples

pbo <- AggregateExpression(seu.obj, 
                           group.by = c("fourth_label",     # CHANGE the column with the cell type information
                                        "sample_ID"), # CHANGE the column with both condition and sample ID
                           assays = 'RNA', 
                           slot = "counts",
                           return.seurat = FALSE)

7.2 Extract the counts

We will get a huge matrix with columns as the cell_samples and rows as genes

pbo <- pbo$RNA
pbo[1:16, 1:16]
## 16 x 16 sparse Matrix of class "dgCMatrix"
##                                                                    
## Xkr4    1093.643289 2125.9808528 5571.40110 2910.169931 3766.602304
## Gm1992     3.877677    .           19.45336    4.200044    5.408629
## Gm19938  111.156146  216.8727370  598.49970  308.845365  393.883635
## Gm37381    4.846212    4.8585340   10.48722    7.847197    1.918910
## Rp1        6.569944    8.3600801   18.91743   19.067562    9.648479
## Sox17      .           .            .          .           .       
## Gm37587    .           .            .          .           .       
## Gm37323    .           .            .          .           .       
## Mrpl15    14.611873   40.7274177   98.04053   45.053542   40.795150
## Lypla1    44.798776  106.5570993  307.61697  165.623627  165.189108
## Tcea1     89.488122  161.2983765  506.50764  280.259347  219.548140
## Rgs20    424.999744  830.0713778 2295.28198 1721.041622 1339.272437
## Gm16041    .           .            .          .           .       
## Atp6v1h   65.013997  145.2898776  526.59777  249.748189  296.068460
## Oprk1      .           0.9397222    .          .           .       
## Npbwr1     .           .            .          .           .       
##                                                                       
## Xkr4    2730.3073634 2702.2570366 3306.953565 1181.6350574 1202.997635
## Gm1992     2.7271025    4.8565451    7.025408    0.9276348    5.830499
## Gm19938  290.5129972  289.6803223  350.299947  127.3282374  154.016181
## Gm37381    7.6221420    2.4827228    5.935597    5.8677618    4.866207
## Rp1        9.5649033   13.7432519   15.844332    5.3997943   10.416664
## Sox17      0.9370237    .            .           .            .       
## Gm37587    .            .            .           .            .       
## Gm37323    .            .            .           .            .       
## Mrpl15    29.7736864   46.2695788   69.347516    8.6552010   10.513704
## Lypla1   135.5127066  115.5226059  155.107204   71.9869395   45.342514
## Tcea1    181.1124276  168.9494111  269.699791   84.4621687  116.605616
## Rgs20   1012.5453157  902.7047437 1281.164265  472.4927685  483.700764
## Gm16041    .            .            .           .            .       
## Atp6v1h  194.8605804  235.4907587  354.307211  117.7060163  124.741939
## Oprk1      .            0.6732986    .           .            .       
## Npbwr1     .            .            .           .            .       
##                                                                              
## Xkr4    1484.828910 613.6564737 899.021739 807.462087 877.860815 1609.6961828
## Gm1992     1.943508   .           1.951583   .          1.936774    4.9111963
## Gm19938  147.332860  66.7199297 102.629435  89.410523  95.282660  199.6115093
## Gm37381    8.831114   2.9294703   4.909370   3.939778   3.794515    0.8046319
## Rp1        9.250730   4.7585909   2.863175   4.633544   3.380324   16.2984510
## Sox17      .          .           .          .          .           .        
## Gm37587    .          .           .          .          1.000000    .        
## Gm37323    .          .           .          .          .           .        
## Mrpl15    34.386934  15.8584266  11.722197   5.831669  18.723732   18.1360671
## Lypla1    90.293666  21.6056832  36.373197  35.047288  44.998423   89.5033630
## Tcea1    152.134523  48.3833076  67.031763  59.603066  74.745816  117.0104168
## Rgs20    897.275506 232.5197115 404.637173 280.463710 386.136917  604.2367263
## Gm16041    .          .           .          .          .           .        
## Atp6v1h  160.097635  65.0372443  65.495332  52.148529  82.758164   82.1776048
## Oprk1      .          0.9988837   .          .          .           .        
## Npbwr1     .          .           .          .          .           .
colnames(pbo)[1:16]
##  [1] "OPC_1-yWT10-a"  "OPC_10-aWT10-b" "OPC_11-y10-b"   "OPC_12-a10-b"  
##  [5] "OPC_13-yWT20-b" "OPC_14-aWT20-b" "OPC_15-y20-b"   "OPC_16-a20-b"  
##  [9] "OPC_2-aWT10-a"  "OPC_3-y10-a"    "OPC_4-a10-a"    "OPC_5-yWT20-a" 
## [13] "OPC_6-aWT20-a"  "OPC_7-y20-a"    "OPC_8-a20-a"    "OPC_9-yWT10-b"

7.2.1 Data transformation

  • Transpose the columns and rows
  • Convert to data.frame
pbo.t <- t(pbo)
pbo.t <- as.data.frame(pbo.t)

Check the structure of the data frame

pbo.t[1:10, 1:10]

Split the rows (cell_samples) to remove the sample ID and only keep cell type information in the data frame - Then we will get a vector containing the cell types, which will be used to split the data frame as a factor variable

# matching anything that's after the '_' and replace it with nothing (removing it)
splitRows <- gsub('_.*', '', rownames(pbo.t)) 
splitRows
##  [1] "OPC"          "OPC"          "OPC"          "OPC"          "OPC"         
##  [6] "OPC"          "OPC"          "OPC"          "OPC"          "OPC"         
## [11] "OPC"          "OPC"          "OPC"          "OPC"          "OPC"         
## [16] "OPC"          "Oligolineage" "Oligolineage" "Oligolineage" "Oligolineage"
## [21] "Oligolineage" "Oligolineage" "Oligolineage" "Oligolineage" "Oligolineage"
## [26] "Oligolineage" "Oligolineage" "Oligolineage" "Oligolineage" "Oligolineage"
## [31] "Oligolineage" "Oligolineage" "Microglia"    "Microglia"    "Microglia"   
## [36] "Microglia"    "Microglia"    "Microglia"    "Microglia"    "Microglia"   
## [41] "Microglia"    "Microglia"    "Microglia"    "Microglia"    "Microglia"   
## [46] "Microglia"    "Microglia"    "Microglia"    "Astrocyte"    "Astrocyte"   
## [51] "Astrocyte"    "Astrocyte"    "Astrocyte"    "Astrocyte"    "Astrocyte"   
## [56] "Astrocyte"    "Astrocyte"    "Astrocyte"    "Astrocyte"    "Astrocyte"   
## [61] "Astrocyte"    "Astrocyte"    "Astrocyte"    "Astrocyte"    "Vascular"    
## [66] "Vascular"     "Vascular"     "Vascular"     "Vascular"     "Vascular"    
## [71] "Vascular"     "Vascular"     "Vascular"     "Vascular"     "Vascular"    
## [76] "Vascular"     "Vascular"     "Vascular"     "Vascular"     "Vascular"    
## [81] "KOOL"         "KOOL"         "KOOL"         "KOOL"         "KOOL"        
## [86] "KOOL"         "KOOL"         "KOOL"         "KOOL"         "KOOL"        
## [91] "KOOL"         "KOOL"         "KOOL"         "KOOL"         "KOOL"

Split data.frame using the splitRows vector - It will return a list with each element corresponding with a cell type - Having data in such a list object makes it convenient to fetch matrices for any cell type in the data

pbo.split <- split.data.frame(pbo.t,
                              f = factor(splitRows))

Check the matrix - using OPC as an example here

# rows as cell_sample ID and columns as genes
pbo.split$"OPC"[1:6, 1:6] # CHANGE 

Fix colnames and transpose it back to columns as the cell_samples and rows as genes

# use 'lapply' to remove the cell type and only leave the sample ID, and also re-transpose the matrix 
pbo.split.modified <- lapply(pbo.split, function(x){ 
  rownames(x) <- gsub('.*_(.*)', '\\1', rownames(x)) # remove the cell type name from the row name
  t(x)
  
})

Check if you have performed the changes

pbo.split.modified$"OPC"[1:6, 1:6] # CHANGE 
##           1-yWT10-a  10-aWT10-b   11-y10-b    12-a10-b  13-yWT20-b   14-aWT20-b
## Xkr4    1093.643289 2125.980853 5571.40110 2910.169931 3766.602304 2730.3073634
## Gm1992     3.877677    0.000000   19.45336    4.200044    5.408629    2.7271025
## Gm19938  111.156146  216.872737  598.49970  308.845365  393.883635  290.5129972
## Gm37381    4.846212    4.858534   10.48722    7.847197    1.918910    7.6221420
## Rp1        6.569944    8.360080   18.91743   19.067562    9.648479    9.5649033
## Sox17      0.000000    0.000000    0.00000    0.000000    0.000000    0.9370237
pbo.split.modified$"Oligolineage"[1:6, 1:6] # CHANGE 
##          1-yWT10-a   10-aWT10-b     11-y10-b   12-a10-b  13-yWT20-b  14-aWT20-b
## Xkr4    8289.87520 5237.2574416 1.606653e+04 5360.84392 20147.50036 9807.792751
## Gm1992    17.90879   13.6822858 4.580000e+01   10.47649    32.95969   24.798255
## Gm19938  784.05682  431.2694088 1.612192e+03  607.25261  1643.64790  895.846572
## Gm37381   54.43715   15.0094934 3.193915e+01   13.87819    52.27220   51.836951
## Rp1      123.25262   70.4733464 6.223708e+01  148.36692   127.60106   95.831363
## Sox17      0.00000    0.9287855 6.012309e-01    0.00000     0.00000    1.889099
# List of clusters
clusters <- c("OPC", "Oligolineage", "Microglia", "Astrocyte", "Vascular")

# Loop over each cluster
for (cluster in clusters) {
  
  # Dynamically access the corresponding pbo.split.modified object (pbo.split.modified$<cluster>)
  pbo <- pbo.split.modified[[cluster]]
  
  # Ensure the columns of pbo match the sample names from your metadata
  sample_names <- unique(seu.obj@meta.data$sample_ID)
  colnames(pbo) <- sample_names

  # Create DESeq2 dataset
  dds <- DESeqDataSetFromMatrix(countData = round(pbo), 
                                colData = metadata,
                                design = ~ sample_ID)  # Change design formula if needed
  
  # Run PCA (Rlog transformation)
  rld <- rlog(dds, blind = TRUE)
  
  # Plot PCA by different metadata (you can customize the plots)
  sample_plot <- DESeq2::plotPCA(rld, ntop = 1000, intgroup = "sample_ID") + 
    scale_color_manual(values = sample_colors_d) + 
    geom_text_repel(aes(label = sample_names)) + 
    theme_minimal() +
    ggtitle(paste(cluster, "- sample_plot"))

  replicate_plot <- DESeq2::plotPCA(rld, ntop = 1000, intgroup = "replicate") + 
    scale_color_manual(values = replicate_colors) + 
    theme_minimal() +
    ggtitle(paste(cluster, "- replicate_plot"))

  age_plot <- DESeq2::plotPCA(rld, ntop = 1000, intgroup = "age_group") + 
    scale_color_manual(values = age_colors) + 
    theme_minimal() +
    ggtitle(paste(cluster, "- age_plot"))

  genotype_plot <- DESeq2::plotPCA(rld, ntop = 1000, intgroup = "genotype") + 
    scale_color_manual(values = genotype_colors) + 
    theme_minimal() +
    ggtitle(paste(cluster, "- genotype_plot"))

  batch_plot <- DESeq2::plotPCA(rld, ntop = 1000, intgroup = "day_nuclei_isolated") + 
    scale_color_manual(values = c("pink", "darkgreen")) + 
    theme_minimal() +
    ggtitle(paste(cluster, "- batch_plot"))

  # Print each plot to the console
  plot(sample_plot)
  plot(replicate_plot)
  plot(age_plot)
  plot(genotype_plot)
  plot(batch_plot)
  
  # Save the plots
  folder <- here("4-Analysis", "PCA")
  ggsave(paste0("PCA_", cluster, "_by_Sample.svg"), sample_plot, path = folder)
  ggsave(paste0("PCA_", cluster, "_by_Replicate.svg"), replicate_plot, path = folder)
  ggsave(paste0("PCA_", cluster, "_by_Age.svg"), age_plot, path = folder)
  ggsave(paste0("PCA_", cluster, "_by_Genotype.svg"), genotype_plot, path = folder)
  ggsave(paste0("PCA_", cluster, "_by_Batch.svg"), batch_plot, path = folder)

  # Optionally, print the current cluster being processed
  print(paste("Processing cluster:", cluster))
}
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## using ntop=1000 top features by variance
## using ntop=1000 top features by variance
## using ntop=1000 top features by variance
## using ntop=1000 top features by variance
## using ntop=1000 top features by variance

## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## [1] "Processing cluster: OPC"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## using ntop=1000 top features by variance
## using ntop=1000 top features by variance
## using ntop=1000 top features by variance
## using ntop=1000 top features by variance
## using ntop=1000 top features by variance

## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## [1] "Processing cluster: Oligolineage"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## using ntop=1000 top features by variance
## using ntop=1000 top features by variance
## using ntop=1000 top features by variance
## using ntop=1000 top features by variance
## using ntop=1000 top features by variance

## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## [1] "Processing cluster: Microglia"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## using ntop=1000 top features by variance
## using ntop=1000 top features by variance
## using ntop=1000 top features by variance
## using ntop=1000 top features by variance
## using ntop=1000 top features by variance

## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## [1] "Processing cluster: Astrocyte"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## using ntop=1000 top features by variance
## using ntop=1000 top features by variance
## using ntop=1000 top features by variance
## using ntop=1000 top features by variance
## using ntop=1000 top features by variance

## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## [1] "Processing cluster: Vascular"

8 Session Info

sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: aarch64-apple-darwin20
## Running under: macOS 15.2
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/Los_Angeles
## tzcode source: internal
## 
## attached base packages:
## [1] grid      stats4    stats     graphics  grDevices datasets  utils    
## [8] methods   base     
## 
## other attached packages:
##  [1] kableExtra_1.4.0            ggvenn_0.1.10              
##  [3] svglite_2.1.3               NatParksPalettes_0.2.0     
##  [5] here_1.0.1                  data.table_1.16.0          
##  [7] ggpmisc_0.6.0               ggpp_0.5.8-1               
##  [9] gghighlight_0.4.1           readxl_1.4.3               
## [11] openxlsx_4.2.7              car_3.1-2                  
## [13] carData_3.0-5               RColorBrewer_1.1-3         
## [15] lubridate_1.9.3             forcats_1.0.0              
## [17] purrr_1.0.2                 readr_2.1.5                
## [19] tidyr_1.3.1                 tibble_3.2.1               
## [21] tidyverse_2.0.0             DESeq2_1.44.0              
## [23] SummarizedExperiment_1.34.0 Biobase_2.64.0             
## [25] MatrixGenerics_1.16.0       matrixStats_1.4.1          
## [27] GenomicRanges_1.56.1        GenomeInfoDb_1.40.1        
## [29] IRanges_2.38.1              S4Vectors_0.42.1           
## [31] BiocGenerics_0.50.0         EnhancedVolcano_1.22.0     
## [33] ggrepel_0.9.6               ggplot2_3.5.1              
## [35] patchwork_1.3.0             glmGamPoi_1.16.0           
## [37] sctransform_0.4.1           Seurat_5.1.0               
## [39] SeuratObject_5.0.2          sp_2.1-4                   
## [41] stringr_1.5.1               dplyr_1.1.4                
## [43] knitr_1.48                 
## 
## loaded via a namespace (and not attached):
##   [1] RcppAnnoy_0.0.22        splines_4.4.1           later_1.3.2            
##   [4] cellranger_1.1.0        polyclip_1.10-7         fastDummies_1.7.4      
##   [7] lifecycle_1.0.4         rprojroot_2.0.4         globals_0.16.3         
##  [10] lattice_0.22-6          MASS_7.3-61             magrittr_2.0.3         
##  [13] plotly_4.10.4           sass_0.4.9              rmarkdown_2.28         
##  [16] jquerylib_0.1.4         yaml_2.3.10             httpuv_1.6.15          
##  [19] spam_2.10-0             zip_2.3.1               spatstat.sparse_3.1-0  
##  [22] reticulate_1.39.0       cowplot_1.1.3           pbapply_1.7-2          
##  [25] abind_1.4-8             zlibbioc_1.50.0         Rtsne_0.17             
##  [28] GenomeInfoDbData_1.2.12 irlba_2.3.5.1           listenv_0.9.1          
##  [31] spatstat.utils_3.1-0    MatrixModels_0.5-3      goftest_1.2-3          
##  [34] RSpectra_0.16-2         spatstat.random_3.3-2   fitdistrplus_1.2-1     
##  [37] parallelly_1.38.0       leiden_0.4.3.1          codetools_0.2-20       
##  [40] DelayedArray_0.30.1     xml2_1.3.6              tidyselect_1.2.1       
##  [43] UCSC.utils_1.0.0        farver_2.1.2            spatstat.explore_3.3-2 
##  [46] jsonlite_1.8.8          progressr_0.14.0        ggridges_0.5.6         
##  [49] survival_3.7-0          systemfonts_1.1.0       tools_4.4.1            
##  [52] ragg_1.3.3              ica_1.0-3               Rcpp_1.0.13            
##  [55] glue_1.7.0              gridExtra_2.3           SparseArray_1.4.8      
##  [58] xfun_0.47               withr_3.0.1             BiocManager_1.30.25    
##  [61] fastmap_1.2.0           fansi_1.0.6             SparseM_1.84-2         
##  [64] digest_0.6.37           timechange_0.3.0        R6_2.5.1               
##  [67] mime_0.12               textshaping_0.4.0       colorspace_2.1-1       
##  [70] scattermore_1.2         tensor_1.5              spatstat.data_3.1-2    
##  [73] utf8_1.2.4              generics_0.1.3          renv_1.0.7             
##  [76] httr_1.4.7              htmlwidgets_1.6.4       S4Arrays_1.4.1         
##  [79] uwot_0.2.2              pkgconfig_2.0.3         gtable_0.3.5           
##  [82] lmtest_0.9-40           XVector_0.44.0          htmltools_0.5.8.1      
##  [85] dotCall64_1.1-1         scales_1.3.0            png_0.1-8              
##  [88] spatstat.univar_3.0-1   rstudioapi_0.16.0       tzdb_0.4.0             
##  [91] reshape2_1.4.4          nlme_3.1-166            cachem_1.1.0           
##  [94] zoo_1.8-12              KernSmooth_2.23-24      parallel_4.4.1         
##  [97] miniUI_0.1.1.1          pillar_1.9.0            vctrs_0.6.5            
## [100] RANN_2.6.2              promises_1.3.0          xtable_1.8-4           
## [103] cluster_2.1.6           evaluate_1.0.0          cli_3.6.3              
## [106] locfit_1.5-9.10         compiler_4.4.1          rlang_1.1.4            
## [109] crayon_1.5.3            future.apply_1.11.2     labeling_0.4.3         
## [112] plyr_1.8.9              stringi_1.8.4           viridisLite_0.4.2      
## [115] deldir_2.0-4            BiocParallel_1.38.0     munsell_0.5.1          
## [118] lazyeval_0.2.2          spatstat.geom_3.3-3     quantreg_5.98          
## [121] Matrix_1.7-0            RcppHNSW_0.6.0          hms_1.1.3              
## [124] future_1.34.0           shiny_1.9.1             highr_0.11             
## [127] ROCR_1.0-11             igraph_2.0.3            bslib_0.8.0            
## [130] polynom_1.4-1